* Citations

*cd "C:\Users\federc\Dropbox\PATSTAT\DATA"
*global tmp "C:\Users\federc\Dropbox\TEMP"
*global data "D:\Patstat data"


cd /users/andreas/dropbox/work/patstat/data
global data "/users/andreas/dropbox/work/patstat_data"
global tmp "/tmp"

* ----------------------------------------------
* Obtain forward citations (from tls211 and tls212)
* PAT_PUBLN_ID refers to the citing publication.
* CITED_PAT_PUBLN_ID refers to the publication being cited
* ----------------------------------------------
use "$data/tls211/tls211", clear
keep appln_id pat_publn_id publn_date
gen yp = substr( publn_date ,1,4)
destring yp, replace
bysort appln_id : egen min_publn_year = min(yp)
replace yp = min_publn_year
label var yp "earliest publication year of cited patent"
drop min* publn_date
ren appln_id cited_appln_id
ren pat_publn_id cited_pat_publn_id //Note: one application can have more than one publication

* We want to get the appln_id of cited patents instead of pat_publn_id
merge 1:m cited_pat_publn_id using "$data/tls212/tls212", keep(match) keepusing(pat_publn_id pat_citn_seq_nr)
drop if _merge~=3 // If no match, then means that the patent is never cited
keep if pat_citn_seq_nr>0 // What's this
drop _merge pat_citn_seq_nr

* We want to get the appln_id of the citing patent
merge m:1 pat_publn_id using "$data/tls211/tls211", keep(match master) keepusing(appln_id)
drop if _merge~=3
drop _merge pat_publn_id
drop if cited_appln_id==0
duplicates drop 

* Get application filing date
merge m:1 appln_id using $data/tls201/tls201, keep(match master) keepusing(appln_filing_date)
gen y = substr(appln_filing_date,1,4)
destring y, replace
label var y "application filing year of citing patent"
drop appln_filing_date
ren appln_id citing_appln_id
drop _merge 
compress
save citations_by_appln_id, replace
// Note: publication year=9999 means missing(unknown) publication year


* ----------------------------------------------
* Count citations per patent(application)
* Note: Use a 3 years time windom after cited patent publication date
* Note: Keep patent applications with publication date <= 2009
* Note: for the moment we don't exclude self-citations
* ----------------------------------------------
use citations_by_appln_id, clear
by cited_appln_id cited_pat_publn_id citing_appln_id, sort: gen unq=_n==1
drop if unq==0  // Zero obs deleted

* Generate 3 years time window
// Note: window starts with cited patent publication date
by cited_appln_id cited_pat_publn_id citing_appln_id , sort:  gen window = y-yp
by cited_appln_id cited_pat_publn_id citing_appln_id , sort:  gen inwindow = (window<4) 

count if window<0 // About 3% of the sample

* Count number of (forward) citations per application

collapse (count) citations=citing_appln_id (sum) inwindow, by(cited_appln_id)
ren inwindow citations3year
label var citations "Total number of citations"
label var citations3year "Number of citations within 3 years (year t+1, t+2, t+3)"
saveold forward_citations, replace
